set seed 122334455

version 18

cd "$dropbox/Oasis/2. Pathways to Choice/8.2 Replication" // change to local directory; this should be the same directory where the survey_data.dta file is stored
	
********************************************************************************

*Table of Contents:
	*1. Generate indices
		*1.1 Social support index - baseline and endline
		*1.2 Self perception index - baseline and endline
		*1.3 Gender equitable beliefs index - baseline and endline
		*1.4 Marriage self-advocacy index - baseline and endline
		*1.5 Socio-economic index - baseline only
		*1.6 Take z scores of all indices
		*1.7 Label all indices
	*2. Calculate average age of others
	*3. Deal with missing baseline data
	*4. Demean control variables
	*5. Generate interactions
	*6. Save final dataset

********************************************************************************

use "survey_data.dta", clear

********************************************************************************

*1. Generate indices
*This section of the code combines variables to create the indices used in the final analysis
*Each index involves generating z-scores for each item in the index, then taking the mean of the z-scores

*1.1 Social support index - baseline and endline
*baseline:
foreach var of varlist social*_str_base {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen social_index_base = rowmean(z_*)
drop z_*

*endline:
foreach var of varlist social*_str {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen social_index = rowmean(z_*)
drop z_*

*1.2 Self perception index - baseline and endline

*baseline:
foreach var of varlist self*_str_base {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen self_index_base = rowmean(z_*)
drop z_*

*endline:
foreach var of varlist self*_str {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen self_index = rowmean(z_*)
drop z_*

*1.3 Gender equitable beliefs index - baseline and endline
*baseline:
*first, recode to reverse (so a higher value means agreement with a more gender equitable norm)
foreach var of varlist norm_soneduc_base - norm_fatherchoose_base {
	gen rvs_`var' = 1 - `var'
}

foreach var of varlist rvs_norm* {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen norm_womanpos_base = rowmean(z_rvs_norm*)
drop z_* rvs*

*endline:
foreach var of varlist norm_soneduc - norm_fatherchoose {
	gen rvs_`var' = 1 - `var'
}

foreach var of varlist rvs_norm* {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen norm_womanpos = rowmean(z_rvs_norm*)
drop z_* rvs*

*1.4 Marriage self-advocacy index - baseline and endline
*baseline:
foreach var of varlist discussmarriage_base delaymarriage_base discussmarr_recself_base ///
	discussmarr_planself_base discussmarr_recotr_base discussmarr_planotr_base {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen marriage_adv_index_base = rowmean(z_*)
drop z_*

*endline:
foreach var of varlist discussmarriage delaymarriage discussmarr_recself ///
	discussmarr_planself discussmarr_recotr discussmarr_planotr {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen marriage_adv_index = rowmean(z_*)
drop z_*

*1.5 Socio-economic index - baseline only

*first, generate asset index
foreach var of varlist asset_*_base {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen asset_index_base = rowmean(z_*)
drop z_*

*then create index
foreach var of varlist mother_literate_base father_literate_base father_nonag_base asset_index_base {
	summ `var'
	local mean = r(mean)
	local sd = r(sd)
	gen z_`var' = (`var' - `mean')/`sd'
}
egen socioecon_index_base = rowmean(z_*)
drop z_*

*1.6 Take z scores of all indices

*baseline and endline
foreach var in self_index social_index norm_womanpos marriage_adv_index {
	summ `var'_base if treated==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_base = (`var'_base - `mean')/`sd' 
	
	summ `var' if treated==0 
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z = (`var' - `mean')/`sd' 
}

*baseline only
foreach var in socioecon_index {
	summ `var'_base if treated==0
	local mean = r(mean)
	local sd = r(sd)
	gen `var'_z_base = (`var'_base - `mean')/`sd'  
}

drop self_index social_index norm_womanpos marriage_adv_index self_index_base social_index_base norm_womanpos_base marriage_adv_index_base socioecon_index_base

*1.7 Label all indices
label var self_index_z "Self Perception Index"
label var social_index_z "Social Support Index"
label var norm_womanpos_z "Gender Equitable Beliefs Index"
label var marriage_adv_index_z "Marriage Self-Advocacy Index"

label var self_index_z_base "Self Perception Index"
label var social_index_z_base "Social Support Index"
label var norm_womanpos_z_base "Gender Eq Beliefs (sd)"
label var asset_index_base "Asset index"
label var socioecon_index_z_base "Socioeconomic (sd)"
label var marriage_adv_index_z_base "Marriage Self-Advocacy Index"

********************************************************************************

*2. Calculate average age of others

*Add up all ages (minus own age) in community
bys community_id: egen total_age = total(age_base)
gen total_age_minus_one = total_age - age_base

*Calculate number of girls in community and number of girls minus one
gen age_data = 1 if age_base<.
bys community_id: egen nr_girls = total(age_data)
gen nr_girls_minus_one = nr_girls - age_data

*Summ up number of other girls
gen avg_ageofothers_base = total_age_minus_one/nr_girls_minus_one
replace avg_ageofothers_base = total_age/nr_girls if age_base==.

label var avg_ageofothers_base "Average age of others"

drop total_age total_age_minus_one age_data nr_girls nr_girls_minus_one

********************************************************************************

*3. Deal with missing baseline data

local outcomes unmarried_base attend_school_base self_index_z_base social_index_z_base nr_friends_base norm_womanpos_z_base timespent_school_base terms_current_base completed_years_base ysis_currschool_base ysis_shareschool_base ybro_currschool_base ybro_shareschool_base want_highered_base agetomarry_base marriage_adv_index_z_base anywork_lastyear_base age_base socioecon_index_z_base ever_school_base no_endline_base
foreach var of local outcomes {
	gen `var'_miss = (`var'==.)
	replace `var'=0 if `var'==.
	local lbl : variable label `var'
	label var `var'_miss "`lbl' - Missing dummy"
}

********************************************************************************
	
*4. Demean control variables

*loop over variables
	*for each variable, summ the variable to generate the control mean
	*generate a demeaned version of the variable, which is the original variable minus the control mean
	
*baseline variables
local baseline_vars unmarried_base attend_school_base self_index_z_base social_index_z_base nr_friends_base norm_womanpos_z_base timespent_school_base terms_current_base completed_years_base ysis_currschool_base ysis_shareschool_base ybro_currschool_base ybro_shareschool_base want_highered_base agetomarry_base marriage_adv_index_z_base anywork_lastyear_base age_base socioecon_index_z_base ever_school_base
foreach var of local baseline_vars {
		
		*variable
		summ `var' if treated==0
		gen `var'_d = `var' - `r(mean)'
		local lbl : variable label `var'
		label var `var'_d "`lbl'"	
		
		*missing dummy
		cap summ `var'_miss if treated==0
		cap gen `var'_miss_d = `var' - `r(mean)'
		cap local lbl : variable label `var'_miss
		cap label var `var'_miss_d "`lbl'"	
}

*variables for interactions
summ avg_ageofothers_base
gen avg_ageofothers_d = avg_ageofothers_base - `r(mean)'
label var avg_ageofothers_d "Mean Age of Group Mates"

summ nr_samefather
gen nr_samefather_d = nr_samefather - `r(mean)'      
label var nr_samefather_d "Nr Girls with Same Father"

summ nr_sibgirls
gen nr_sibgirls_d = nr_sibgirls - `r(mean)'
label var nr_sibgirls_d "Nr Girl Siblings"

********************************************************************************

*5. Generate interactions

*By Others' Ages
gen avg_age_treated = avg_ageofothers_d*treated
label var avg_age_treated "Int x Mean Age of Others"

*By Sibling
gen nr_samefather_treated = nr_samefather_d*treated
label var nr_samefather_treated "Int x Nr Girls with Same Father"

gen nr_sibgirls_treated = nr_sibgirls_d*treated
label var nr_sibgirls_treated "Int x Nr Girl Siblings"

*By intermediate outcomes

gen treated_norm = treated*norm_womanpos_z
label var treated_norm "Intervention x Empowerment Beliefs Index"
gen treated_school = treated*attend_school
label var treated_school "Intervention x Girl Attends School (d)"
gen treated_self = treated*self_index_z
label var treated_self "Intervention x Self-Perception Index"
gen treated_social = treated*social_index_z
label var treated_social "Intervention x Social Support Index"

gen attend_school_norm = attend_school*norm_womanpos_z_base
label var attend_school_norm "Attend School x Empowerment Beliefs Index"
gen attend_school_self= attend_school*self_index_z_base
label var attend_school_self "Attend School x Self-Perception Index"
gen attend_school_social = attend_school*social_index_z_base
label var attend_school_social "Attend School x Social Support Index"

********************************************************************************

*6. Save final dataset
	
save "data_for_analysis.dta", replace
